// PIRA Ethnicity

log using PIRAEthnicity.log, replace

use "C:\Users\sbstjp\OneDrive - Cardiff University\RaceIAT.public.2023.dta" // Project Implicit Demo Website Datasets, Race IAT, Last Updated: 2025-02-05 09:04 PM. Date accessed: March 10, 2025. SPSS version of the dataset downloaded and then, without alterations, converted to a Stata file. 

*Dropping variables makes analysis quicker
keep politicalid7 birthyear birthSex raceomb002 edu_14 occuSelfDetail imsems1 imsems2 imsems3 imsems4 imsems5 imsems6 imsems7 imsems8 imsems9 imsems10 mcpr1 mcpr3 mcpr4 mcpr6 mcpr10 mcpr11 mcpr12 mcpr13 mcpr14 mcpr15 mcpr2 mcpr5 mcpr8 mcpr9 mcpr16

// Reverse items for inclusion in scale
local vars imsems7 mcpr2 mcpr5 mcpr8 mcpr9 mcpr16
foreach var of local vars {
    qui sum `var'
    gen rev`var' = (1 + r(max)) - `var'
}

// Create scale 1
*Standardize items in the scale from 1-2 - this avoids 0, for reasons outlined in next step
foreach var in imsems1 imsems2 imsems3 imsems4 imsems5 imsems6 revimsems7 imsems8 imsems9 imsems10 {
    summarize `var'
    gen s`var' = 1 + (`var' - r(min)) / (r(max) - r(min))
}

* Replace missing values with 0 for the specified variables - this is necessary as Stata doesn't add up missing values
foreach var in simsems1 simsems2 simsems3 simsems4 simsems5 simsems6 srevimsems7 simsems8 simsems9 simsems10 {
    replace `var' = 0 if missing(`var')
}

* Initialize the total score and the count of non-zero responses
gen total_scoreIM = 0
gen count_nonzeroIM = 0

* Add each variable to the total scale score and count it if non-zero
foreach var in simsems1 simsems2 simsems3 simsems4 simsems5 simsems6 srevimsems7 simsems8 simsems9 simsems10 {
    replace total_scoreIM = total_scoreIM + `var'
    replace count_nonzeroIM = count_nonzeroIM + (`var' != 0)
}

* Calculate the average score, avoiding division by zero
gen ImsemsScale = .
replace ImsemsScale = total_scoreIM / count_nonzeroIM if count_nonzeroIM > 0

// Create scale 2

*Standardize items in the scale from 1-2 - this avoids 0, for reasons outlined in next step
foreach var in mcpr1 mcpr3 mcpr4 mcpr6 mcpr10 mcpr11 mcpr12 mcpr13 mcpr14 mcpr15 revmcpr2 revmcpr5 revmcpr8 revmcpr9 revmcpr16 {
    summarize `var'
    gen s`var' = 1 + (`var' - r(min)) / (r(max) - r(min))
}

* Replace missing values with 0 for the specified variables - this is necessary as Stata doesn't add up missing values
foreach var in smcpr1 smcpr3 smcpr4 smcpr6 smcpr10 smcpr11 smcpr12 smcpr13 smcpr14 smcpr15 srevmcpr2 srevmcpr5 srevmcpr8 srevmcpr9 srevmcpr16 {
    replace `var' = 0 if missing(`var')
}

* Initialize the total score and the count of non-zero responses
gen total_scoreMP = 0
gen count_nonzeroMP = 0

* Add each variable to the total scale score and count it if non-zero
foreach var in smcpr1 smcpr3 smcpr4 smcpr6 smcpr10 smcpr11 smcpr12 smcpr13 smcpr14 smcpr15 srevmcpr2 srevmcpr5 srevmcpr8 srevmcpr9 srevmcpr16 {
    replace total_scoreMP = total_scoreMP + `var'
    replace count_nonzeroMP = count_nonzeroMP + (`var' != 0)
}

* Calculate the average score, avoiding division by zero
gen McprScale = .
replace McprScale = total_scoreMP / count_nonzeroMP if count_nonzeroMP > 0

// Demographics

*This is a convenience sample and under 18s have completed it. But we're studying voters and can drop under 18s.
drop if birthyear>2005 
gen age = 2023 - birthyear 
*Drop as probably misreported age
drop if age>100

*Rename variables and create dummies
rename birthSex FemaleGender

gen Graduate=.
replace Graduate=0 if inrange(edu_14, 1, 6)
replace Graduate=1 if inrange(edu_14, 7, 14)

gen White=.
replace White=1 if raceomb002==6
replace White=0 if inrange(raceomb002, 1, 5)
replace White=0 if inlist(raceomb002, 7, 8)

gen Black=.
replace Black=1 if raceomb002==5
replace Black=0 if inrange(raceomb002, 1, 4)
replace Black=0 if inrange(raceomb002, 6, 8)

gen EastAsian=.
replace EastAsian=1 if raceomb002==2
replace EastAsian=0 if raceomb002==1
replace EastAsian=0 if inrange(raceomb002, 3, 8)

gen SouthAsian=.
replace SouthAsian=1 if raceomb002==3
replace SouthAsian=0 if inlist(raceomb002, 1, 2)
replace SouthAsian=0 if inrange(raceomb002, 4, 8)

*Sociocultural employment variable
gen occuSelfDetail_clean = real(subinstr(occuSelfDetail, "-", "", .))

gen SocCulEmployment=.
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 431000, 439000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 131000, 132000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 151000, 153000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 171000, 173000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 471000, 475000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 451000, 459000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 351000, 359000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 371000, 373000)

replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 110000, 119000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 551000, 553000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 511000, 519000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 451000, 459000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 331000, 332000)
replace SocCulEmployment=0 if occuSelfDetail_clean==339000

replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 491000, 499000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 411000, 419000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 191000, 192000)
replace SocCulEmployment=0 if inrange(occuSelfDetail_clean, 531000, 537000)
replace SocCulEmployment=0 if occuSelfDetail_clean==194000

replace SocCulEmployment=1 if inrange(occuSelfDetail_clean, 271000, 274000)
replace SocCulEmployment=1 if inrange(occuSelfDetail_clean, 251000, 259000)
replace SocCulEmployment=1 if inrange(occuSelfDetail_clean, 291000, 292000)
replace SocCulEmployment=1 if inrange(occuSelfDetail_clean, 311000, 319000)
replace SocCulEmployment=1 if inrange(occuSelfDetail_clean, 231000, 232000)
replace SocCulEmployment=1 if inrange(occuSelfDetail_clean, 391000, 399000)
replace SocCulEmployment=1 if inrange(occuSelfDetail_clean, 211000, 212000)
replace SocCulEmployment=1 if occuSelfDetail_clean==333000
replace SocCulEmployment=1 if occuSelfDetail_clean==193000

// Standardize
egen Age = std(age) 

// Create a weight
gen weight = 1

* Code age into categories - the other variables are already in such categories
recode age (min/24=1 "0-24") (25/34=2 "25-34") (35/44=3 "35-44") (45/54=4 "45-54") (55/max=5 "55+"), generate(age_group)

* The below are based on census data, except for ideology, which is based on data from the ANES24 pilot study
gen sextot=.
replace sextot = 0.49 if FemaleGender == 1 // Male
replace sextot = 0.51 if FemaleGender == 2 // Female

gen agetot=.
replace agetot = 0.12 if age_group == 1 // 18-24
replace agetot = 0.17 if age_group == 2 // 25-34
replace agetot = 0.17 if age_group == 3 // 35-44
replace agetot = 0.16 if age_group == 4 // 45-54
replace agetot = 0.38 if age_group == 5 // 55+

gen idtot=.
replace idtot = 0.08 if politicalid7 == 1 // Strongly conservative
replace idtot = 0.17 if politicalid7 == 2 // Moderately conservative
replace idtot = 0.08 if politicalid7 == 3 // Slightly conservative
replace idtot = 0.33 if politicalid7 == 4 // Neutral
replace idtot = 0.08 if politicalid7 == 5 // Slightly liberal
replace idtot = 0.18 if politicalid7 == 6 // Moderately liberal
replace idtot = 0.08 if politicalid7 == 7 // Strongly liberal

gen edcats=.
replace edcats=1 if inrange(edu_14, 1, 4) // Highschoolandbelow
replace edcats=2 if inrange(edu_14, 5, 6) // Somecollege
replace edcats=3 if edu_14==7 // Bachelors
replace edcats=4 if inrange(edu_14, 8, 14) // Graduate

gen edtot=.
replace edtot = 0.3875 if edcats == 1
replace edtot = 0.2644 if edcats == 2
replace edtot = 0.2208 if edcats == 3
replace edtot = 0.1273 if edcats == 4

gen ethcats=.
replace ethcats=1 if raceomb002==1 //AmericanIndian/AlaskaNative
replace ethcats=2 if inrange(raceomb002, 2, 3) //Asian
replace ethcats=3 if raceomb002==4 //NativeHawaiian
replace ethcats=4 if raceomb002==5 //Black
replace ethcats=5 if raceomb002==6 //White
replace ethcats=6 if raceomb002==7 //Other
replace ethcats=7 if raceomb002==8 //Multiracial

gen ethtot=.
replace ethtot = 0.01 if ethcats == 1
replace ethtot = 0.0599 if ethcats == 2
replace ethtot = 0.002 if ethcats == 3
replace ethtot = 0.1213 if ethcats == 4
replace ethtot = 0.6051 if ethcats == 5
replace ethtot = 0.0742 if ethcats == 6
replace ethtot = 0.1276 if ethcats == 7

survwgt rake weight , by(FemaleGender age_group politicalid7 ethcats edcats) totvars(sextot agetot idtot ethtot edtot) generate(rakedweight)

// Regressions and correlations
regress McprScale Age FemaleGender Graduate White [pweight=rakedweight], robust 
eststo
regress McprScale Age FemaleGender Graduate Black [pweight=rakedweight], robust 
eststo
regress McprScale Age FemaleGender Graduate EastAsian [pweight=rakedweight], robust 
eststo
regress McprScale Age FemaleGender Graduate SouthAsian [pweight=rakedweight], robust
eststo 
esttab

pwcorr McprScale White Black EastAsian SouthAsian [aweight=rakedweight], sig

log close